*********************************************
*    POOLING SCOTTS FROM 2001 to 2019       *
*           ----------  			        *
* This do-file pools the individual scotts  *
* year for each individual year 2001-2019   *
* into as single file                       *
*********************************************

clear all
set more off

// Working directories
cd "$workpath"

// Locals we need for looping across years and IO directories
local years "2001 2003 2005 2007 2009 2011 2013 2017"


foreach y in `years' {
	use "$privatedata/scotts`y'.dta", clear
	
	if (`y' == 2001) {
		replace province="NL" if province=="NF"
	}
	
	if (`y' >= 2013) {
		ren productcategory1 productcategories
		ren naicscode1 primarynaicscode
		ren naicscode2 secondarynaicscode1
		ren naicscode3 secondarynaicscode2
		ren naicscode4 secondarynaicscode3
		ren naicscode5 secondarynaicscode4
	}
	
	keep scottsid companyname primarynaicscode secondarynaicscode* businesstype* squarefootage province productcategories employment estimatedsales exportindicator headofficestatus year*
	
	gen year = `y'
	
	// Basic data cleaning
	tostring scottsid primarynaicscode secondarynaicscode*, replace
	destring employment, replace
	ren primarynaicscode naics
	*[NOTE] Why not first replace naics=secondarynaicscode1 if naics==""
	replace naics=secondarynaicscode2 if naics==""
	replace naics=secondarynaicscode3 if naics==""
	replace naics=secondarynaicscode4 if naics==""

	gen businesstype=""
	replace businesstype=businesstype1
	replace businesstype=businesstype2 if businesstype==""
	replace businesstype=businesstype3 if businesstype==""

	keep scottsid companyname *naics* *businesstype* productcategories employment estimatedsales exportindicator headofficestatus year* province squarefootage
	order scottsid companyname naics productcategories employment estimatedsales exportindicator headofficestatus year* province squarefootage

	compress
	
	save "$temppath/scotts_`y'.dta", replace
}


// Append all the files
clear

foreach y in `years' {
	append using "$temppath/scotts_`y'.dta", force
}

// Final processing of the data

replace exportindicator = "" if (exportindicator != "N" & exportindicator != "Y")

// Generate industry variables 2 to 6 digits
tostring naics, replace
drop if naics == ""
gen naics2digit = substr(naics,1,2)
replace naics2digit = "" if naics2digit == "Se"
destring(naics2digit), replace

*[NOTE] We recode here
recode naics2digit (31/33=30) (44/45=40) (48/49=400)

gen naics3digit = substr(naics,1,3)
replace naics3digit = "" if naics3digit == "Ser"

gen naics4digit = substr(naics,1,4)
replace naics4digit = "" if naics2digit == .

gen naics5digit=substr(naics,1,5)
replace naics5digit="" if naics2digit == .

gen naics6digit=substr(naics,1,6)
replace naics6digit="" if naics2digit == .

destring naics?d*, replace
qui cap drop _merge

// Keep only plants within Canadien provinces
keep if province=="AB" | province=="BC" | province=="MB" | province=="NB" | province=="NL" | province=="NS" | province=="ON" | province=="PE" | province=="QC" | province=="SK"

// This is the final processed Scott's dataset for 2001-2019

compress

save "$outpath/scotts_global.dta", replace
